require(stringi)
require(estimatr)

factor.list <- c("group", "attribute", "sphere", "definition")
factor.labels <- c("Group", "Attribute", "Sphere and mechanism", "Definition")
level.labels <- list(c("Advantaged", "Disadvantaged"), 
                     c("Gender", "Nationality", "Sexuality", "Education"), 
                     c("Public (taste-based)", "Public (stereotypical)", 
                       "Public (statistical)", "Public (customer needs)", "Private"), 
                     c("None", "Minimal", "Detailed"))

#### primary survey data (GPT-4) ####
GPT.4.data <- read.csv("GPT_4_data.csv")

## convert ChatGPT responses into numerical outcome values
GPT.4.data$rating <- GPT.4.data$rating.raw <- 
  as.numeric(stri_replace_all_regex(stri_sub(stri_trans_general(GPT.4.data$answer, "Fullwidth-Halfwidth"), 
                                             1, stri_locate_first_regex(GPT.4.data$answer, "理由")[, 1] - 1), 
                                    "\\n| |\\(.+\\)|\\:|【|】|答え", ""))

## manual correction of irregular or incomplete responses
GPT.4.data$answer[which(is.na(GPT.4.data$rating.raw))]

GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[1]] <- NA
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[2]] <- 1.5
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[3]] <- 5
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[4]] <- 6
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[5]] <- 4
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[6]] <- 4
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[7]] <- NA
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[8]] <- 5.5
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[9]] <- 5.5
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[10]] <- 3.5
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[11]] <- 3
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[12]] <- 5
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[13]] <- 6
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[14]] <- 6
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[15]] <- 4.5
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[16]] <- NA
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[17]] <- 1.5
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[18]] <- 3.5
GPT.4.data$rating[which(is.na(GPT.4.data$rating.raw))[19]] <- 4

## center outcome values to compute marginal means
GPT.4.data$rating.centered <- GPT.4.data$rating - mean(GPT.4.data$rating, na.rm = TRUE)

## create variables corresponding to experimental conditions
GPT.4.data$attribute <- factor(ifelse(GPT.4.data$Q < 11, "gender", 
                                      ifelse(GPT.4.data$Q < 21, "nationality", 
                                             ifelse(GPT.4.data$Q < 31, "sexuality", "education"))), 
                               levels = c("education", "gender", "nationality", "sexuality"))
GPT.4.data$attribute.id <- ifelse(GPT.4.data$attribute == "education", 4, 
                                  as.numeric(GPT.4.data$attribute) - 1)
GPT.4.data$attribute.reordered <- factor(GPT.4.data$attribute, 
                                         levels = c("gender", "nationality", "sexuality", "education"))
GPT.4.data$disadvantaged <- ifelse(GPT.4.data$attribute %in% c("gender", "education"), 
                                   ifelse(GPT.4.data$Q %% 2 == 1, 1, 0), 
                                   ifelse(GPT.4.data$Q %% 2 == 0, 1, 0))
GPT.4.data$advantaged <- 1 - GPT.4.data$disadvantaged
GPT.4.data$group <- factor(ifelse(GPT.4.data$disadvantaged == 1, 
                                  "disadvantaged", "advantaged"), 
                           levels = c("advantaged", "disadvantaged"))
GPT.4.data$sphere <- factor(ifelse(GPT.4.data$Q %% 10 == 1 | GPT.4.data$Q %% 10 == 2, "taste", 
                                   ifelse(GPT.4.data$Q %% 10 == 3 | GPT.4.data$Q %% 10 == 4, "stereotype", 
                                          ifelse(GPT.4.data$Q %% 10 == 5 | GPT.4.data$Q %% 10 == 6, "statistics", 
                                                 ifelse(GPT.4.data$Q %% 10 == 7 | GPT.4.data$Q %% 10 == 8, "needs", "private")))), 
                            levels = c("private", "taste", "stereotype", "statistics", "needs"))
GPT.4.data$sphere.id <- ifelse(GPT.4.data$sphere == "private", 5, 
                               as.numeric(GPT.4.data$sphere) - 1)
GPT.4.data$sphere.stereotype.ref <- relevel(GPT.4.data$sphere, "stereotype")
GPT.4.data$sphere.statistics.ref <- relevel(GPT.4.data$sphere, "statistics")
GPT.4.data$sphere.needs.ref <- relevel(GPT.4.data$sphere, "needs")
GPT.4.data$sphere.reordered <- factor(GPT.4.data$sphere, 
                                      levels = c("taste", "stereotype", "statistics", "needs", "private"))
GPT.4.data$gender <- (GPT.4.data$attribute == "gender") * 1
GPT.4.data$nationality <- (GPT.4.data$attribute == "nationality") * 1
GPT.4.data$sexuality <- (GPT.4.data$attribute == "sexuality") * 1
GPT.4.data$education <- (GPT.4.data$attribute == "education") * 1
GPT.4.data$taste <- (GPT.4.data$sphere == "taste") * 1
GPT.4.data$stereotype <- (GPT.4.data$sphere == "stereotype") * 1
GPT.4.data$statistics <- (GPT.4.data$sphere == "statistics") * 1
GPT.4.data$needs <- (GPT.4.data$sphere == "needs") * 1
GPT.4.data$private <- (GPT.4.data$sphere == "private") * 1
GPT.4.data$definition <- factor(c("none", "minimal", "detailed")[GPT.4.data$C], 
                                levels = c("none", "minimal", "detailed"))
GPT.4.data$none <- (GPT.4.data$definition == "none") * 1
GPT.4.data$minimal <- (GPT.4.data$definition == "minimal") * 1
GPT.4.data$detailed <- (GPT.4.data$definition == "detailed") * 1
GPT.4.data$definition.ref <- relevel(GPT.4.data$definition, "minimal")

#### additional survey data (GPT-4o at a high-temperature setting) ####
GPT.4o.high.data <- read.csv("GPT_4o_high_data.csv")

## convert ChatGPT responses into numerical outcome values
GPT.4o.high.data$rating <- GPT.4o.high.data$rating.raw <- 
  as.numeric(stri_replace_all_regex(stri_sub(stri_trans_general(GPT.4o.high.data$answer, "Fullwidth-Halfwidth"), 
                                             1, stri_locate_first_regex(GPT.4o.high.data$answer, "理由")[, 1] - 1), 
                                    "\\n| |\\(.+\\)|\\:|【|】|答え", ""))

# no need for manual correction of irregular or incomplete responses
GPT.4o.high.data$answer[which(is.na(GPT.4o.high.data$rating.raw))]

## center outcome values to compute marginal means
GPT.4o.high.data$rating.centered <- GPT.4o.high.data$rating - mean(GPT.4o.high.data$rating, na.rm = TRUE)

## create variables corresponding to experimental conditions
GPT.4o.high.data$attribute <- factor(ifelse(GPT.4o.high.data$Q < 11, "gender", 
                                            ifelse(GPT.4o.high.data$Q < 21, "nationality", 
                                                   ifelse(GPT.4o.high.data$Q < 31, "sexuality", "education"))), 
                                     levels = c("education", "gender", "nationality", "sexuality"))
GPT.4o.high.data$attribute.id <- ifelse(GPT.4o.high.data$attribute == "education", 4, 
                                        as.numeric(GPT.4o.high.data$attribute) - 1)
GPT.4o.high.data$attribute.reordered <- factor(GPT.4o.high.data$attribute, 
                                               levels = c("gender", "nationality", "sexuality", "education"))
GPT.4o.high.data$disadvantaged <- ifelse(GPT.4o.high.data$attribute %in% c("gender", "education"), 
                                         ifelse(GPT.4o.high.data$Q %% 2 == 1, 1, 0), 
                                         ifelse(GPT.4o.high.data$Q %% 2 == 0, 1, 0))
GPT.4o.high.data$advantaged <- 1 - GPT.4o.high.data$disadvantaged
GPT.4o.high.data$group <- factor(ifelse(GPT.4o.high.data$disadvantaged == 1, 
                                        "disadvantaged", "advantaged"), 
                                 levels = c("advantaged", "disadvantaged"))
GPT.4o.high.data$sphere <- factor(ifelse(GPT.4o.high.data$Q %% 10 == 1 | GPT.4o.high.data$Q %% 10 == 2, "taste", 
                                         ifelse(GPT.4o.high.data$Q %% 10 == 3 | GPT.4o.high.data$Q %% 10 == 4, "stereotype", 
                                                ifelse(GPT.4o.high.data$Q %% 10 == 5 | GPT.4o.high.data$Q %% 10 == 6, "statistics", 
                                                       ifelse(GPT.4o.high.data$Q %% 10 == 7 | GPT.4o.high.data$Q %% 10 == 8, "needs", "private")))), 
                                  levels = c("private", "taste", "stereotype", "statistics", "needs"))
GPT.4o.high.data$sphere.id <- ifelse(GPT.4o.high.data$sphere == "private", 5, 
                                     as.numeric(GPT.4o.high.data$sphere) - 1)
GPT.4o.high.data$sphere.stereotype.ref <- relevel(GPT.4o.high.data$sphere, "stereotype")
GPT.4o.high.data$sphere.statistics.ref <- relevel(GPT.4o.high.data$sphere, "statistics")
GPT.4o.high.data$sphere.needs.ref <- relevel(GPT.4o.high.data$sphere, "needs")
GPT.4o.high.data$sphere.reordered <- factor(GPT.4o.high.data$sphere, 
                                            levels = c("taste", "stereotype", "statistics", "needs", "private"))
GPT.4o.high.data$gender <- (GPT.4o.high.data$attribute == "gender") * 1
GPT.4o.high.data$nationality <- (GPT.4o.high.data$attribute == "nationality") * 1
GPT.4o.high.data$sexuality <- (GPT.4o.high.data$attribute == "sexuality") * 1
GPT.4o.high.data$education <- (GPT.4o.high.data$attribute == "education") * 1
GPT.4o.high.data$taste <- (GPT.4o.high.data$sphere == "taste") * 1
GPT.4o.high.data$stereotype <- (GPT.4o.high.data$sphere == "stereotype") * 1
GPT.4o.high.data$statistics <- (GPT.4o.high.data$sphere == "statistics") * 1
GPT.4o.high.data$needs <- (GPT.4o.high.data$sphere == "needs") * 1
GPT.4o.high.data$private <- (GPT.4o.high.data$sphere == "private") * 1
GPT.4o.high.data$definition <- factor(c("none", "minimal", "detailed")[GPT.4o.high.data$C], 
                                      levels = c("none", "minimal", "detailed"))
GPT.4o.high.data$none <- (GPT.4o.high.data$definition == "none") * 1
GPT.4o.high.data$minimal <- (GPT.4o.high.data$definition == "minimal") * 1
GPT.4o.high.data$detailed <- (GPT.4o.high.data$definition == "detailed") * 1
GPT.4o.high.data$definition.ref <- relevel(GPT.4o.high.data$definition, "minimal")

#### additional survey data (GPT-4o at a low-temperature setting) ####
GPT.4o.low.data <- read.csv("GPT_4o_low_data.csv")

## convert ChatGPT responses into numerical outcome values
GPT.4o.low.data$rating <- GPT.4o.low.data$rating.raw <- 
  as.numeric(stri_replace_all_regex(stri_sub(stri_trans_general(GPT.4o.low.data$answer, "Fullwidth-Halfwidth"), 
                                             1, stri_locate_first_regex(GPT.4o.low.data$answer, "理由")[, 1] - 1), 
                                    "\\n| |\\(.+\\)|\\:|【|】|答え", ""))

# no need for manual correction of irregular or incomplete responses
GPT.4o.low.data$answer[which(is.na(GPT.4o.low.data$rating.raw))]

## center outcome values to compute marginal means
GPT.4o.low.data$rating.centered <- GPT.4o.low.data$rating - mean(GPT.4o.low.data$rating, na.rm = TRUE)

## create variables corresponding to experimental conditions
GPT.4o.low.data$attribute <- factor(ifelse(GPT.4o.low.data$Q < 11, "gender", 
                                           ifelse(GPT.4o.low.data$Q < 21, "nationality", 
                                                  ifelse(GPT.4o.low.data$Q < 31, "sexuality", "education"))), 
                                    levels = c("education", "gender", "nationality", "sexuality"))
GPT.4o.low.data$attribute.id <- ifelse(GPT.4o.low.data$attribute == "education", 4, 
                                       as.numeric(GPT.4o.low.data$attribute) - 1)
GPT.4o.low.data$attribute.reordered <- factor(GPT.4o.low.data$attribute, 
                                              levels = c("gender", "nationality", "sexuality", "education"))
GPT.4o.low.data$disadvantaged <- ifelse(GPT.4o.low.data$attribute %in% c("gender", "education"), 
                                        ifelse(GPT.4o.low.data$Q %% 2 == 1, 1, 0), 
                                        ifelse(GPT.4o.low.data$Q %% 2 == 0, 1, 0))
GPT.4o.low.data$advantaged <- 1 - GPT.4o.low.data$disadvantaged
GPT.4o.low.data$group <- factor(ifelse(GPT.4o.low.data$disadvantaged == 1, 
                                       "disadvantaged", "advantaged"), 
                                levels = c("advantaged", "disadvantaged"))
GPT.4o.low.data$sphere <- factor(ifelse(GPT.4o.low.data$Q %% 10 == 1 | GPT.4o.low.data$Q %% 10 == 2, "taste", 
                                        ifelse(GPT.4o.low.data$Q %% 10 == 3 | GPT.4o.low.data$Q %% 10 == 4, "stereotype", 
                                               ifelse(GPT.4o.low.data$Q %% 10 == 5 | GPT.4o.low.data$Q %% 10 == 6, "statistics", 
                                                      ifelse(GPT.4o.low.data$Q %% 10 == 7 | GPT.4o.low.data$Q %% 10 == 8, "needs", "private")))), 
                                 levels = c("private", "taste", "stereotype", "statistics", "needs"))
GPT.4o.low.data$sphere.id <- ifelse(GPT.4o.low.data$sphere == "private", 5, 
                                    as.numeric(GPT.4o.low.data$sphere) - 1)
GPT.4o.low.data$sphere.stereotype.ref <- relevel(GPT.4o.low.data$sphere, "stereotype")
GPT.4o.low.data$sphere.statistics.ref <- relevel(GPT.4o.low.data$sphere, "statistics")
GPT.4o.low.data$sphere.needs.ref <- relevel(GPT.4o.low.data$sphere, "needs")
GPT.4o.low.data$sphere.reordered <- factor(GPT.4o.low.data$sphere, 
                                           levels = c("taste", "stereotype", "statistics", "needs", "private"))
GPT.4o.low.data$gender <- (GPT.4o.low.data$attribute == "gender") * 1
GPT.4o.low.data$nationality <- (GPT.4o.low.data$attribute == "nationality") * 1
GPT.4o.low.data$sexuality <- (GPT.4o.low.data$attribute == "sexuality") * 1
GPT.4o.low.data$education <- (GPT.4o.low.data$attribute == "education") * 1
GPT.4o.low.data$taste <- (GPT.4o.low.data$sphere == "taste") * 1
GPT.4o.low.data$stereotype <- (GPT.4o.low.data$sphere == "stereotype") * 1
GPT.4o.low.data$statistics <- (GPT.4o.low.data$sphere == "statistics") * 1
GPT.4o.low.data$needs <- (GPT.4o.low.data$sphere == "needs") * 1
GPT.4o.low.data$private <- (GPT.4o.low.data$sphere == "private") * 1
GPT.4o.low.data$definition <- factor(c("none", "minimal", "detailed")[GPT.4o.low.data$C], 
                                     levels = c("none", "minimal", "detailed"))
GPT.4o.low.data$none <- (GPT.4o.low.data$definition == "none") * 1
GPT.4o.low.data$minimal <- (GPT.4o.low.data$definition == "minimal") * 1
GPT.4o.low.data$detailed <- (GPT.4o.low.data$definition == "detailed") * 1
GPT.4o.low.data$definition.ref <- relevel(GPT.4o.low.data$definition, "minimal")

#### combined data of GPT-4o ####
GPT.4o.data <- rbind(cbind(GPT.4o.high.data, temperature = "high"), 
                     cbind(GPT.4o.low.data, temperature = "low"))

#### additional survey data (GPT o3mini) ####
OpenAI.o3mini.data <- read.csv("OpenAI_o3mini_data.csv")

## convert ChatGPT responses into numerical outcome values
OpenAI.o3mini.data$rating <- OpenAI.o3mini.data$rating.raw <- 
  as.numeric(stri_replace_all_regex(stri_sub(stri_trans_general(OpenAI.o3mini.data$answer, "Fullwidth-Halfwidth"), 
                                             1, stri_locate_first_regex(OpenAI.o3mini.data$answer, "理由")[, 1] - 1), 
                                    "\\n| |\\(.+\\)|\\:|【|】|答え", ""))

# no need for manual correction of irregular or incomplete responses
OpenAI.o3mini.data$answer[which(is.na(OpenAI.o3mini.data$rating.raw))]

## center outcome values to compute marginal means
OpenAI.o3mini.data$rating.centered <- OpenAI.o3mini.data$rating - mean(OpenAI.o3mini.data$rating, na.rm = TRUE)

## create variables corresponding to experimental conditions
OpenAI.o3mini.data$attribute <- factor(ifelse(OpenAI.o3mini.data$Q < 11, "gender", 
                                              ifelse(OpenAI.o3mini.data$Q < 21, "nationality", 
                                                     ifelse(OpenAI.o3mini.data$Q < 31, "sexuality", "education"))), 
                                       levels = c("education", "gender", "nationality", "sexuality"))
OpenAI.o3mini.data$attribute.id <- ifelse(OpenAI.o3mini.data$attribute == "education", 4, 
                                          as.numeric(OpenAI.o3mini.data$attribute) - 1)
OpenAI.o3mini.data$attribute.reordered <- factor(OpenAI.o3mini.data$attribute, 
                                                 levels = c("gender", "nationality", "sexuality", "education"))
OpenAI.o3mini.data$disadvantaged <- ifelse(OpenAI.o3mini.data$attribute %in% c("gender", "education"), 
                                           ifelse(OpenAI.o3mini.data$Q %% 2 == 1, 1, 0), 
                                           ifelse(OpenAI.o3mini.data$Q %% 2 == 0, 1, 0))
OpenAI.o3mini.data$advantaged <- 1 - OpenAI.o3mini.data$disadvantaged
OpenAI.o3mini.data$group <- factor(ifelse(OpenAI.o3mini.data$disadvantaged == 1, 
                                          "disadvantaged", "advantaged"), 
                                   levels = c("advantaged", "disadvantaged"))
OpenAI.o3mini.data$sphere <- factor(ifelse(OpenAI.o3mini.data$Q %% 10 == 1 | OpenAI.o3mini.data$Q %% 10 == 2, "taste", 
                                           ifelse(OpenAI.o3mini.data$Q %% 10 == 3 | OpenAI.o3mini.data$Q %% 10 == 4, "stereotype", 
                                                  ifelse(OpenAI.o3mini.data$Q %% 10 == 5 | OpenAI.o3mini.data$Q %% 10 == 6, "statistics", 
                                                         ifelse(OpenAI.o3mini.data$Q %% 10 == 7 | OpenAI.o3mini.data$Q %% 10 == 8, "needs", "private")))), 
                                    levels = c("private", "taste", "stereotype", "statistics", "needs"))
OpenAI.o3mini.data$sphere.id <- ifelse(OpenAI.o3mini.data$sphere == "private", 5, 
                                       as.numeric(OpenAI.o3mini.data$sphere) - 1)
OpenAI.o3mini.data$sphere.stereotype.ref <- relevel(OpenAI.o3mini.data$sphere, "stereotype")
OpenAI.o3mini.data$sphere.statistics.ref <- relevel(OpenAI.o3mini.data$sphere, "statistics")
OpenAI.o3mini.data$sphere.needs.ref <- relevel(OpenAI.o3mini.data$sphere, "needs")
OpenAI.o3mini.data$sphere.reordered <- factor(OpenAI.o3mini.data$sphere, 
                                              levels = c("taste", "stereotype", "statistics", "needs", "private"))
OpenAI.o3mini.data$gender <- (OpenAI.o3mini.data$attribute == "gender") * 1
OpenAI.o3mini.data$nationality <- (OpenAI.o3mini.data$attribute == "nationality") * 1
OpenAI.o3mini.data$sexuality <- (OpenAI.o3mini.data$attribute == "sexuality") * 1
OpenAI.o3mini.data$education <- (OpenAI.o3mini.data$attribute == "education") * 1
OpenAI.o3mini.data$taste <- (OpenAI.o3mini.data$sphere == "taste") * 1
OpenAI.o3mini.data$stereotype <- (OpenAI.o3mini.data$sphere == "stereotype") * 1
OpenAI.o3mini.data$statistics <- (OpenAI.o3mini.data$sphere == "statistics") * 1
OpenAI.o3mini.data$needs <- (OpenAI.o3mini.data$sphere == "needs") * 1
OpenAI.o3mini.data$private <- (OpenAI.o3mini.data$sphere == "private") * 1
OpenAI.o3mini.data$definition <- factor(c("none", "minimal", "detailed")[OpenAI.o3mini.data$C], 
                                        levels = c("none", "minimal", "detailed"))
OpenAI.o3mini.data$none <- (OpenAI.o3mini.data$definition == "none") * 1
OpenAI.o3mini.data$minimal <- (OpenAI.o3mini.data$definition == "minimal") * 1
OpenAI.o3mini.data$detailed <- (OpenAI.o3mini.data$definition == "detailed") * 1
OpenAI.o3mini.data$definition.ref <- relevel(OpenAI.o3mini.data$definition, "minimal")

#### distribution of survey ratings ####
# function to compute marginal means
MM.function <- function(data) {
  lm.result <- lm_robust(formula(rating ~ 1), data, clusters = id)
  MM.out <- cbind(mean = lm.result$coefficients, 
                  lower = lm.result$conf.low, 
                  upper = lm.result$conf.high)
  rownames(MM.out) <- names(lm.result$coefficients)
  MM.out
}

# function to compute distributions
dist.function <- function(x) {
  prop.table(table(factor(round(x), levels = 1:6)))
}

## marginal means in the GPT-4 survey
plot.MM.GPT.4 <- matrix(NA, 17, 3)
plot.MM.GPT.4[1, ] <- MM.function(GPT.4.data)
for (i in 1:4) {
  plot.MM.GPT.4[i + 1, ] <- MM.function(subset(GPT.4.data, attribute.id == i & advantaged == 1))
}
for (i in 1:4) {
  plot.MM.GPT.4[i + 5, ] <- MM.function(subset(GPT.4.data, attribute.id == i & disadvantaged == 1))
}
for (i in 1:5) {
  plot.MM.GPT.4[i + 9, ] <- MM.function(subset(GPT.4.data, sphere.id == i))
}
for (i in 1:3) {
  plot.MM.GPT.4[i + 14, ] <- MM.function(subset(GPT.4.data, C == i))
}

## distribution in the GPT-4 survey
plot.dist.GPT.4 <- matrix(NA, 17, 6)
plot.dist.GPT.4[1, ] <- dist.function(GPT.4.data$rating)
for (i in 1:4) {
  plot.dist.GPT.4[i + 1, ] <- dist.function(subset(GPT.4.data, attribute.id == i & advantaged == 1)$rating)
}
for (i in 1:4) {
  plot.dist.GPT.4[i + 5, ] <- dist.function(subset(GPT.4.data, attribute.id == i & disadvantaged == 1)$rating)
}
for (i in 1:5) {
  plot.dist.GPT.4[i + 9, ] <- dist.function(subset(GPT.4.data, sphere.id == i)$rating)
}
for (i in 1:3) {
  plot.dist.GPT.4[i + 14, ] <- dist.function(subset(GPT.4.data, C == i)$rating)
}

round(plot.dist.GPT.4, 2)

# percentage of responses on the 'not discriminatory' side to the customer needs scenario
round(sum(plot.dist.GPT.4[13, 1:3]), 2)

## marginal means in the GPT-4o survey at a high-temperature setting
plot.MM.GPT.4o.high <- matrix(NA, 17, 3)
plot.MM.GPT.4o.high[1, ] <- MM.function(GPT.4o.high.data)
for (i in 1:4) {
  plot.MM.GPT.4o.high[i + 1, ] <- MM.function(subset(GPT.4o.high.data, attribute.id == i & advantaged == 1))
}
for (i in 1:4) {
  plot.MM.GPT.4o.high[i + 5, ] <- MM.function(subset(GPT.4o.high.data, attribute.id == i & disadvantaged == 1))
}
for (i in 1:5) {
  plot.MM.GPT.4o.high[i + 9, ] <- MM.function(subset(GPT.4o.high.data, sphere.id == i))
}
for (i in 1:3) {
  plot.MM.GPT.4o.high[i + 14, ] <- MM.function(subset(GPT.4o.high.data, C == i))
}

## distribution in the GPT-4o survey at a high-temperature setting
plot.dist.GPT.4o.high <- matrix(NA, 17, 6)
plot.dist.GPT.4o.high[1, ] <- dist.function(GPT.4o.high.data$rating)
for (i in 1:4) {
  plot.dist.GPT.4o.high[i + 1, ] <- dist.function(subset(GPT.4o.high.data, attribute.id == i & advantaged == 1)$rating)
}
for (i in 1:4) {
  plot.dist.GPT.4o.high[i + 5, ] <- dist.function(subset(GPT.4o.high.data, attribute.id == i & disadvantaged == 1)$rating)
}
for (i in 1:5) {
  plot.dist.GPT.4o.high[i + 9, ] <- dist.function(subset(GPT.4o.high.data, sphere.id == i)$rating)
}
for (i in 1:3) {
  plot.dist.GPT.4o.high[i + 14, ] <- dist.function(subset(GPT.4o.high.data, C == i)$rating)
}

round(plot.dist.GPT.4o.high, 2)

# percentage of responses on the 'not discriminatory' side to the customer needs scenario
round(sum(plot.dist.GPT.4o.high[13, 1:3]), 2)

## marginal means in the GPT-4o survey at a low-temperature setting
plot.MM.GPT.4o.low <- matrix(NA, 17, 3)
plot.MM.GPT.4o.low[1, ] <- MM.function(GPT.4o.low.data)
for (i in 1:4) {
  plot.MM.GPT.4o.low[i + 1, ] <- MM.function(subset(GPT.4o.low.data, attribute.id == i & advantaged == 1))
}
for (i in 1:4) {
  plot.MM.GPT.4o.low[i + 5, ] <- MM.function(subset(GPT.4o.low.data, attribute.id == i & disadvantaged == 1))
}
for (i in 1:5) {
  plot.MM.GPT.4o.low[i + 9, ] <- MM.function(subset(GPT.4o.low.data, sphere.id == i))
}
for (i in 1:3) {
  plot.MM.GPT.4o.low[i + 14, ] <- MM.function(subset(GPT.4o.low.data, C == i))
}

## distribution in the GPT-4o survey at a low-temperature setting
plot.dist.GPT.4o.low <- matrix(NA, 17, 6)
plot.dist.GPT.4o.low[1, ] <- dist.function(GPT.4o.low.data$rating)
for (i in 1:4) {
  plot.dist.GPT.4o.low[i + 1, ] <- dist.function(subset(GPT.4o.low.data, attribute.id == i & advantaged == 1)$rating)
}
for (i in 1:4) {
  plot.dist.GPT.4o.low[i + 5, ] <- dist.function(subset(GPT.4o.low.data, attribute.id == i & disadvantaged == 1)$rating)
}
for (i in 1:5) {
  plot.dist.GPT.4o.low[i + 9, ] <- dist.function(subset(GPT.4o.low.data, sphere.id == i)$rating)
}
for (i in 1:3) {
  plot.dist.GPT.4o.low[i + 14, ] <- dist.function(subset(GPT.4o.low.data, C == i)$rating)
}

round(plot.dist.GPT.4o.low, 2)

# percentage of responses on the 'not discriminatory' side to the customer needs scenario
round(sum(plot.dist.GPT.4o.low[13, 1:3]), 2)

## marginal means in the GPT o3mini survey
plot.MM.OpenAI.o3mini <- matrix(NA, 17, 3)
plot.MM.OpenAI.o3mini[1, ] <- MM.function(OpenAI.o3mini.data)
for (i in 1:4) {
  plot.MM.OpenAI.o3mini[i + 1, ] <- MM.function(subset(OpenAI.o3mini.data, attribute.id == i & advantaged == 1))
}
for (i in 1:4) {
  plot.MM.OpenAI.o3mini[i + 5, ] <- MM.function(subset(OpenAI.o3mini.data, attribute.id == i & disadvantaged == 1))
}
for (i in 1:5) {
  plot.MM.OpenAI.o3mini[i + 9, ] <- MM.function(subset(OpenAI.o3mini.data, sphere.id == i))
}
for (i in 1:3) {
  plot.MM.OpenAI.o3mini[i + 14, ] <- MM.function(subset(OpenAI.o3mini.data, C == i))
}

## distribution in the OpenAI o3-mini survey
plot.dist.OpenAI.o3mini <- matrix(NA, 17, 6)
plot.dist.OpenAI.o3mini[1, ] <- dist.function(OpenAI.o3mini.data$rating)
for (i in 1:4) {
  plot.dist.OpenAI.o3mini[i + 1, ] <- dist.function(subset(OpenAI.o3mini.data, attribute.id == i & advantaged == 1)$rating)
}
for (i in 1:4) {
  plot.dist.OpenAI.o3mini[i + 5, ] <- dist.function(subset(OpenAI.o3mini.data, attribute.id == i & disadvantaged == 1)$rating)
}
for (i in 1:5) {
  plot.dist.OpenAI.o3mini[i + 9, ] <- dist.function(subset(OpenAI.o3mini.data, sphere.id == i)$rating)
}
for (i in 1:3) {
  plot.dist.OpenAI.o3mini[i + 14, ] <- dist.function(subset(OpenAI.o3mini.data, C == i)$rating)
}

round(plot.dist.OpenAI.o3mini, 2)

# percentage of responses on the 'not discriminatory' side to the customer needs scenario
round(sum(plot.dist.OpenAI.o3mini[13, 1:3]), 2)

## Figures S9, S10, and S11
main.labels <- c("Overall", "Male", "Japanese", "Heterosexuals", "UTokyo grads", 
                 "Female", "Chinese", "Homosexuals", "NEPU grads", 
                 "Public\n(taste-based)", "Public\n(stereotypical)", 
                 "Public\n(statistical)", "Public\n(customer needs)", "Private", 
                 "None", "Minimal", "Detailed")
png("Figure_S9.png", 
    width = 6, height = 6, unit = "in", pointsize = 8, res = 2000)
layout(matrix(c(1, 18:20, 2:14, 21:23, 15:17, 24), 6, 4, byrow = TRUE))
par(mar = c(1.5, 0.5, 2, 0.5), omi = c(0, 1, 0, 0), lwd = 0.5, xpd = TRUE)
for (i in 1:17) {
  plot(NULL, NULL, bty = "n", xlim = c(0.5, 6.5), ylim = c(-1, 1), 
       xlab = "", ylab = "", xaxt = "n", yaxt = "n")
  segments(0.25, 0, 6.75, 0, col = "darkgray")
  segments(0.25, c(-0.8, -0.6, -0.4, -0.2, 0.2, 0.4, 0.6, 0.8), 
           6.75, c(-0.8, -0.6, -0.4, -0.2, 0.2, 0.4, 0.6, 0.8), lty = 3, col = "lightgray")
  segments(1:6, -0.95, 1:6, 0.95, lty = 3, col = "lightgray")
  for (j in 1:6) {
    polygon(c(j - 0.5, j + 0.5, j + 0.5, j - 0.5), 
            c(0, 0, plot.dist.GPT.4o.high[i, j], plot.dist.GPT.4o.high[i, j]), 
            col = "gray")
  }
  segments(plot.MM.GPT.4o.high[i, 2], 0.1, plot.MM.GPT.4o.high[i, 3], 0.1)
  points(plot.MM.GPT.4o.high[i, 1], 0.1, pch = 19, cex = 0.8)
  for (j in 1:6) {
    polygon(c(j - 0.5, j + 0.5, j + 0.5, j - 0.5), 
            c(0, 0, -1 * plot.dist.GPT.4[i, j], -1 * plot.dist.GPT.4[i, j]), 
            col = "white")
  }
  segments(plot.MM.GPT.4[i, 2], -0.1, plot.MM.GPT.4[i, 3], -0.1)
  points(plot.MM.GPT.4[i, 1], -0.1, pch = 21, bg = "white", cex = 0.8)
  mtext(main.labels[i], line = 0.5, font = 2)
  if (i == 1) {
    text(6, plot.dist.GPT.4o.high[1, 6], "4o (high)", pos = 3)
    text(6, -1 * plot.dist.GPT.4[1, 6], "4", pos = 1)
    mtext(c("Not\ndiscriminatory", "Discriminatory"), 
          1, c(0.5, -0.5), at = c(0.5, 6.5), adj = c(0, 1), cex = 0.6)
  }
}
mtext("Overall", 2, 2, outer = TRUE, at = 11 / 12, font = 2, las = 1)
mtext("Group status\nand attribute", 2, 2, outer = TRUE, at = 0.75, font = 2, las = 1)
mtext("Sphere and\nmechanism", 2, 2, outer = TRUE, at = 5 / 12, font = 2, las = 1)
mtext("Definition", 2, 2, outer = TRUE, at = 1 / 12, font = 2, las = 1)
dev.off()

png("Figure_S10.png", 
    width = 6, height = 6, unit = "in", pointsize = 8, res = 2000)
layout(matrix(c(1, 18:20, 2:14, 21:23, 15:17, 24), 6, 4, byrow = TRUE))
par(mar = c(1.5, 0.5, 2, 0.5), omi = c(0, 1, 0, 0), lwd = 0.5, xpd = TRUE)
for (i in 1:17) {
  plot(NULL, NULL, bty = "n", xlim = c(0.5, 6.5), ylim = c(-1, 1), 
       xlab = "", ylab = "", xaxt = "n", yaxt = "n")
  segments(0.25, 0, 6.75, 0, col = "darkgray")
  segments(0.25, c(-0.8, -0.6, -0.4, -0.2, 0.2, 0.4, 0.6, 0.8), 
           6.75, c(-0.8, -0.6, -0.4, -0.2, 0.2, 0.4, 0.6, 0.8), lty = 3, col = "lightgray")
  segments(1:6, -0.95, 1:6, 0.95, lty = 3, col = "lightgray")
  for (j in 1:6) {
    polygon(c(j - 0.5, j + 0.5, j + 0.5, j - 0.5), 
            c(0, 0, plot.dist.GPT.4o.low[i, j], plot.dist.GPT.4o.low[i, j]), 
            col = "gray")
  }
  segments(plot.MM.GPT.4o.low[i, 2], 0.1, plot.MM.GPT.4o.low[i, 3], 0.1)
  points(plot.MM.GPT.4o.low[i, 1], 0.1, pch = 19, cex = 0.8)
  for (j in 1:6) {
    polygon(c(j - 0.5, j + 0.5, j + 0.5, j - 0.5), 
            c(0, 0, -1 * plot.dist.GPT.4[i, j], -1 * plot.dist.GPT.4[i, j]), 
            col = "white")
  }
  segments(plot.MM.GPT.4[i, 2], -0.1, plot.MM.GPT.4[i, 3], -0.1)
  points(plot.MM.GPT.4[i, 1], -0.1, pch = 21, bg = "white", cex = 0.8)
  mtext(main.labels[i], line = 0.5, font = 2)
  if (i == 1) {
    text(6, plot.dist.GPT.4o.low[1, 6], "4o (low)", pos = 3)
    text(6, -1 * plot.dist.GPT.4[1, 6], "4", pos = 1)
    mtext(c("Not\ndiscriminatory", "Discriminatory"), 
          1, c(0.5, -0.5), at = c(0.5, 6.5), adj = c(0, 1), cex = 0.6)
  }
}
mtext("Overall", 2, 2, outer = TRUE, at = 11 / 12, font = 2, las = 1)
mtext("Group status\nand attribute", 2, 2, outer = TRUE, at = 0.75, font = 2, las = 1)
mtext("Sphere and\nmechanism", 2, 2, outer = TRUE, at = 5 / 12, font = 2, las = 1)
mtext("Definition", 2, 2, outer = TRUE, at = 1 / 12, font = 2, las = 1)
dev.off()

png("Figure_S11.png", 
    width = 6, height = 6, unit = "in", pointsize = 8, res = 2000)
layout(matrix(c(1, 18:20, 2:14, 21:23, 15:17, 24), 6, 4, byrow = TRUE))
par(mar = c(1.5, 0.5, 2, 0.5), omi = c(0, 1, 0, 0), lwd = 0.5, xpd = TRUE)
for (i in 1:17) {
  plot(NULL, NULL, bty = "n", xlim = c(0.5, 6.5), ylim = c(-1, 1), 
       xlab = "", ylab = "", xaxt = "n", yaxt = "n")
  segments(0.25, 0, 6.75, 0, col = "darkgray")
  segments(0.25, c(-0.8, -0.6, -0.4, -0.2, 0.2, 0.4, 0.6, 0.8), 
           6.75, c(-0.8, -0.6, -0.4, -0.2, 0.2, 0.4, 0.6, 0.8), lty = 3, col = "lightgray")
  segments(1:6, -0.95, 1:6, 0.95, lty = 3, col = "lightgray")
  for (j in 1:6) {
    polygon(c(j - 0.5, j + 0.5, j + 0.5, j - 0.5), 
            c(0, 0, plot.dist.OpenAI.o3mini[i, j], plot.dist.OpenAI.o3mini[i, j]), 
            col = "gray")
  }
  segments(plot.MM.OpenAI.o3mini[i, 2], 0.1, plot.MM.OpenAI.o3mini[i, 3], 0.1)
  points(plot.MM.OpenAI.o3mini[i, 1], 0.1, pch = 19, cex = 0.8)
  for (j in 1:6) {
    polygon(c(j - 0.5, j + 0.5, j + 0.5, j - 0.5), 
            c(0, 0, -1 * plot.dist.GPT.4[i, j], -1 * plot.dist.GPT.4[i, j]), 
            col = "white")
  }
  segments(plot.MM.GPT.4[i, 2], -0.1, plot.MM.GPT.4[i, 3], -0.1)
  points(plot.MM.GPT.4[i, 1], -0.1, pch = 21, bg = "white", cex = 0.8)
  mtext(main.labels[i], line = 0.5, font = 2)
  if (i == 1) {
    text(6, plot.dist.OpenAI.o3mini[1, 6], "o3-mini", pos = 3)
    text(6, -1 * plot.dist.GPT.4[1, 6], "4", pos = 1)
    mtext(c("Not\ndiscriminatory", "Discriminatory"), 
          1, c(0.5, -0.5), at = c(0.5, 6.5), adj = c(0, 1), cex = 0.6)
  }
}
mtext("Overall", 2, 2, outer = TRUE, at = 11 / 12, font = 2, las = 1)
mtext("Group status\nand attribute", 2, 2, outer = TRUE, at = 0.75, font = 2, las = 1)
mtext("Sphere and\nmechanism", 2, 2, outer = TRUE, at = 5 / 12, font = 2, las = 1)
mtext("Definition", 2, 2, outer = TRUE, at = 1 / 12, font = 2, las = 1)
dev.off()

#### comparison of GPT-4, 4o, and o3-mini ####
## combine the data from GPT-4, 4o, and o3-mini into a single dataset
combined.data <- rbind(cbind(id = GPT.4.data$id, 
                             task = GPT.4.data$id * 10 + GPT.4.data$thread_id, 
                             GPT.4.data[, c("rating", "attribute", "sphere", 
                                            "disadvantaged", "definition")], 
                             model = "4"), 
                       cbind(id = GPT.4o.data$id + 10000, 
                             task = GPT.4o.data$id * 10 + GPT.4o.data$thread_id, 
                             GPT.4o.high.data[, c("rating", "attribute", "sphere", 
                                                  "disadvantaged", "definition")], 
                             model = "4o"), 
                       cbind(id = OpenAI.o3mini.data$id + 20000, 
                             task = OpenAI.o3mini.data$id * 10 + OpenAI.o3mini.data$thread_id, 
                             OpenAI.o3mini.data[, c("rating", "attribute", "sphere", 
                                                    "disadvantaged", "definition")], 
                             model = "o3-mini"))

## regression analysis with model indicator variables
combined.result <- lm_robust(rating ~ model, 
                             combined.data, clusters = id, 
                             fixed_effects = ~ task, se_type = "stata")
round(summary(combined.result)$coefficients[, c(1, 2 ,4)], 3)

# comparison between GPT-4o and OpenAI o3-mini
combined.result.relabeled <- lm_robust(rating ~ I(model == "4") + I(model == "o3-mini"), 
                                       combined.data, clusters = id, 
                                       fixed_effects = ~ task, se_type = "stata")
round(summary(combined.result.relabeled)$coefficients[, c(1, 2 ,4)], 3)

## compute centered marginal means
# function to compute contered marginal means
centered.MM.function <- function(data) {
  lm.result <- lm_robust(formula(rating.centered ~ 1), data, clusters = id)
  MM.out <- cbind(mean = lm.result$coefficients, 
                  lower = lm.result$conf.low, 
                  upper = lm.result$conf.high)
  rownames(MM.out) <- names(lm.result$coefficients)
  MM.out
}

## centered marginal means in the GPT-4 survey
plot.centered.MM.GPT.4 <- matrix(NA, 14, 3)
plot.centered.MM.GPT.4[1, ] <- centered.MM.function(subset(GPT.4.data, advantaged == 1))
plot.centered.MM.GPT.4[2, ] <- centered.MM.function(subset(GPT.4.data, disadvantaged == 1))
for (i in 1:4) {
  plot.centered.MM.GPT.4[i + 2, ] <- centered.MM.function(subset(GPT.4.data, attribute.id == i))
}
for (i in 1:5) {
  plot.centered.MM.GPT.4[i + 6, ] <- centered.MM.function(subset(GPT.4.data, sphere.id == i))
}
for (i in 1:3) {
  plot.centered.MM.GPT.4[i + 11, ] <- centered.MM.function(subset(GPT.4.data, C == i))
}

## centered marginal means in the GPT-4o survey
plot.centered.MM.GPT.4o <- matrix(NA, 14, 3)
plot.centered.MM.GPT.4o[1, ] <- centered.MM.function(subset(GPT.4o.data, advantaged == 1))
plot.centered.MM.GPT.4o[2, ] <- centered.MM.function(subset(GPT.4o.data, disadvantaged == 1))
for (i in 1:4) {
  plot.centered.MM.GPT.4o[i + 2, ] <- centered.MM.function(subset(GPT.4o.data, attribute.id == i))
}
for (i in 1:5) {
  plot.centered.MM.GPT.4o[i + 6, ] <- centered.MM.function(subset(GPT.4o.data, sphere.id == i))
}
for (i in 1:3) {
  plot.centered.MM.GPT.4o[i + 11, ] <- centered.MM.function(subset(GPT.4o.data, C == i))
}

## centered marginal means in the OpenAI o3-mini survey
plot.centered.MM.OpenAI.o3mini <- matrix(NA, 14, 3)
plot.centered.MM.OpenAI.o3mini[1, ] <- centered.MM.function(subset(OpenAI.o3mini.data, advantaged == 1))
plot.centered.MM.OpenAI.o3mini[2, ] <- centered.MM.function(subset(OpenAI.o3mini.data, disadvantaged == 1))
for (i in 1:4) {
  plot.centered.MM.OpenAI.o3mini[i + 2, ] <- centered.MM.function(subset(OpenAI.o3mini.data, attribute.id == i))
}
for (i in 1:5) {
  plot.centered.MM.OpenAI.o3mini[i + 6, ] <- centered.MM.function(subset(OpenAI.o3mini.data, sphere.id == i))
}
for (i in 1:3) {
  plot.centered.MM.OpenAI.o3mini[i + 11, ] <- centered.MM.function(subset(OpenAI.o3mini.data, C == i))
}

## Figure S12
png("Figure_S12.png", 
    width = 4, height = 4, unit = "in", pointsize = 8, res = 2000)
par(mar = c(3.5, 0, 0, 0), lwd = 0.5)
plot(NULL, NULL, bty = "n", xlim = c(-2.5, 0.9), ylim = c(0.5, 18), 
     xlab = "", ylab = "", xaxt = "n", yaxt = "n")
abline(v = 0, col = "darkgray")
abline(v = c(-1.2, -0.9, -0.6, -0.3, 0.3, 0.6, 0.9), 
       lty = 3, col = "darkgray")
segments(-1.25, c(17:16, 14:11, 9:5, 3:1), 
         0.95, c(17:16, 14:11, 9:5, 3:1), 
         lty = 3, col = "darkgray")
text(-2.6, 18, "Group status", pos = 4, font = 2)
text(-2.5, 17:16, level.labels[[1]], pos = 4)
segments(plot.centered.MM.GPT.4[1:2, 2], 17:16 + 0.3, 
         plot.centered.MM.GPT.4[1:2, 3], 17:16 + 0.3)
points(plot.centered.MM.GPT.4[1:2, 1], 17:16 + 0.3, pch = 19)
segments(plot.centered.MM.GPT.4o[1:2, 2], 17:16, 
         plot.centered.MM.GPT.4o[1:2, 3], 17:16)
points(plot.centered.MM.GPT.4o[1:2, 1], 17:16, pch = 15)
segments(plot.centered.MM.OpenAI.o3mini[1:2, 2], 17:16 - 0.3, 
         plot.centered.MM.OpenAI.o3mini[1:2, 3], 17:16 - 0.3)
points(plot.centered.MM.OpenAI.o3mini[1:2, 1], 17:16 - 0.3, pch = 21, bg = "white")
text(-2.6, 15, "Attribute", pos = 4, font = 2)
text(-2.5, 14:11, level.labels[[2]], pos = 4)
segments(plot.centered.MM.GPT.4[3:6, 2], 14:11 + 0.3, 
         plot.centered.MM.GPT.4[3:6, 3], 14:11 + 0.3)
points(plot.centered.MM.GPT.4[3:6, 1], 14:11 + 0.3, pch = 19)
segments(plot.centered.MM.GPT.4o[3:6, 2], 14:11, 
         plot.centered.MM.GPT.4o[3:6, 3], 14:11)
points(plot.centered.MM.GPT.4o[3:6, 1], 14:11, pch = 15)
segments(plot.centered.MM.OpenAI.o3mini[3:6, 2], 14:11 - 0.3, 
         plot.centered.MM.OpenAI.o3mini[3:6, 3], 14:11 - 0.3)
points(plot.centered.MM.OpenAI.o3mini[3:6, 1], 14:11 - 0.3, pch = 21, bg = "white")
text(-2.6, 10, "Sphere and mechanism", pos = 4, font = 2)
text(-2.5, 9:5, level.labels[[3]], pos = 4)
segments(plot.centered.MM.GPT.4[7:11, 2], 9:5 + 0.3, 
         plot.centered.MM.GPT.4[7:11, 3], 9:5 + 0.3)
points(plot.centered.MM.GPT.4[7:11, 1], 9:5 + 0.3, pch = 19)
segments(plot.centered.MM.GPT.4o[7:11, 2], 9:5, 
         plot.centered.MM.GPT.4o[7:11, 3], 9:5)
points(plot.centered.MM.GPT.4o[7:11, 1], 9:5, pch = 15)
segments(plot.centered.MM.OpenAI.o3mini[7:11, 2], 9:5 - 0.3, 
         plot.centered.MM.OpenAI.o3mini[7:11, 3], 9:5 - 0.3)
points(plot.centered.MM.OpenAI.o3mini[7:11, 1], 9:5 - 0.3, pch = 21, bg = "white")
text(-2.6, 4, "Definition", pos = 4, font = 2)
text(-2.5, 3:1, level.labels[[4]], pos = 4)
segments(plot.centered.MM.GPT.4[12:14, 2], 3:1 + 0.3, 
         plot.centered.MM.GPT.4[12:14, 3], 3:1 + 0.3)
points(plot.centered.MM.GPT.4[12:14, 1], 3:1 + 0.3, pch = 19)
segments(plot.centered.MM.GPT.4o[12:14, 2], 3:1, 
         plot.centered.MM.GPT.4o[12:14, 3], 3:1)
points(plot.centered.MM.GPT.4o[12:14, 1], 3:1, pch = 15)
segments(plot.centered.MM.OpenAI.o3mini[12:14, 2], 3:1 - 0.3, 
         plot.centered.MM.OpenAI.o3mini[12:14, 3], 3:1 - 0.3)
points(plot.centered.MM.OpenAI.o3mini[12:14, 1], 3:1 - 0.3, pch = 21, bg = "white")
par(xpd = TRUE)
legend(-2.5, par()$usr[3] + 0.5, legend = c("4", "4o", "o3-mini"), 
       pch = c(19, 15, 21), bg = c(NA, NA, "white"))
axis(1, at = seq(-1.2, 0.9, 0.3), 
     labels = sprintf("%.1f", seq(-1.2, 0.9, 0.3)), lwd = 0.5)
mtext("Centered marginal mean", 1, 2.5, at = -0.3)
dev.off()

#### why OpenAI o3-mini sometimes responded "Almost certainly not discriminatory" ####
OpenAI.o3mini.1.responses <- subset(OpenAI.o3mini.data, rating == 1)

table(OpenAI.o3mini.1.responses$Q, OpenAI.o3mini.1.responses$C)

head(OpenAI.o3mini.1.responses$answer[OpenAI.o3mini.1.responses$Q %in% c(37, 38) & 
                                        OpenAI.o3mini.1.responses$C == 2])

#### comparison between high- and low-temperature settings ####
## overall average
temperature.result <- lm_robust(rating ~ disadvantaged + attribute + sphere + 
                                  definition + temperature, 
                                GPT.4o.data, clusters = id)
round(summary(temperature.result)$coefficients[, c(1, 2 ,4)], 3)

## sensitivity to the factors
F.test.disadvantaged <- anova(lm(rating ~ disadvantaged + attribute + sphere + definition + temperature, 
                                 data = GPT.4o.data), 
                              lm(rating ~ disadvantaged * temperature + attribute + sphere + definition, 
                                 data = GPT.4o.data))
round(F.test.disadvantaged$`Pr(>F)`, 3)
F.test.attribute <- anova(lm(rating ~ disadvantaged + attribute + sphere + definition + temperature, 
                             data = GPT.4o.data), 
                          lm(rating ~ disadvantaged + attribute * temperature + sphere + definition, 
                             data = GPT.4o.data))
round(F.test.attribute$`Pr(>F)`, 3)
F.test.sphere <- anova(lm(rating ~ disadvantaged + attribute + sphere + definition + temperature, 
                          data = GPT.4o.data), 
                       lm(rating ~ disadvantaged + attribute + sphere * temperature + definition, 
                          data = GPT.4o.data))
round(F.test.sphere$`Pr(>F)`, 3)
F.test.definition <- anova(lm(rating ~ disadvantaged + attribute + sphere + definition + temperature, 
                              data = GPT.4o.data), 
                           lm(rating ~ disadvantaged + attribute + sphere + definition * temperature, 
                              data = GPT.4o.data))
round(F.test.definition$`Pr(>F)`, 3)